
///////////////////////////////////////////////////////////////////////////////
//                                                                           //
//                   Long number - data string operations                    //
//                                                                           //
///////////////////////////////////////////////////////////////////////////////

#include "..\include.h"

///////////////////////////////////////////////////////////////////////////////
// fill data string

void bignum::FillStr(buint* dst, buint val, bint len)
{
#ifdef X86

	FillStr_x64(dst, val, len);
	return;

#else // X86

/*
#ifdef OPTIMISE
	// do not use - not so effective as "rep stos"
	for (; len >= 8; len -= 8)
	{
		dst[0] = val;
		dst[1] = val;
		dst[2] = val;
		dst[3] = val;
		dst[4] = val;
		dst[5] = val;
		dst[6] = val;
		dst[7] = val;
		dst += 8;
	}
#endif // OPTIMISE
*/

	// compiled as more effective "rep stos"
	for (; len > 0; len--)
	{
		dst[0] = val;
		dst++;
	}

#endif // X86
}

///////////////////////////////////////////////////////////////////////////////
// copy data string in UP direction (must be dst < src if overlapped)

void bignum::CopyStr(buint* dst, const buint* src, bint len)
{
#ifdef X86

	CopyStr_x64(dst, src, len);
	return;

#else // X86

#ifdef OPTIMISE
	for (; len >= 8; len -= 8)
	{
		dst[0] = src[0];
		dst[1] = src[1];
		dst[2] = src[2];
		dst[3] = src[3];
		dst[4] = src[4];
		dst[5] = src[5];
		dst[6] = src[6];
		dst[7] = src[7];
		dst += 8;
		src += 8;
	}
#endif // OPTIMISE

	for (; len > 0; len--)
	{
		dst[0] = src[0];
		dst++;
		src++;
	}

#endif // X86
}

///////////////////////////////////////////////////////////////////////////////
// copy data string in DOWN direction (must be dst > src if overlapped)

void bignum::CopyDownStr(buint* dst, const buint* src, bint len)
{
#ifdef X86

	CopyDownStr_x64(dst, src, len);
	return;

#else // X86

	dst += len - 1;
	src += len - 1;

#ifdef OPTIMISE
	for (; len >= 8; len -= 8)
	{
		dst[ 0] = src[ 0];
		dst[-1] = src[-1];
		dst[-2] = src[-2];
		dst[-3] = src[-3];
		dst[-4] = src[-4];
		dst[-5] = src[-5];
		dst[-6] = src[-6];
		dst[-7] = src[-7];
		dst -= 8;
		src -= 8;
	}
#endif // OPTIMISE

	for (; len > 0; len--)
	{
		dst[0] = src[0];
		dst--;
		src--;
	}

#endif // X86
}

///////////////////////////////////////////////////////////////////////////////
// scan equal data string in UP direction (returns length of equal data)

bint bignum::ScanEquStr(const buint* dst, buint val, bint len)
{
#ifdef X86

	return ScanEquStr_x64(dst, val, len);

#else // X86

	bint len0 = len;

#ifdef OPTIMISE
	for (; len >= 8; len -= 8)
	{
		if (dst[0] != val)  goto SCANEQU_RES;
		if (dst[1] != val) { len -= 1; goto SCANEQU_RES; }
		if (dst[2] != val) { len -= 2; goto SCANEQU_RES; }
		if (dst[3] != val) { len -= 3; goto SCANEQU_RES; }
		if (dst[4] != val) { len -= 4; goto SCANEQU_RES; }
		if (dst[5] != val) { len -= 5; goto SCANEQU_RES; }
		if (dst[6] != val) { len -= 6; goto SCANEQU_RES; }
		if (dst[7] != val) { len -= 7; goto SCANEQU_RES; }
		dst += 8;
	}
#endif // OPTIMISE

	for (; len > 0; len--)
	{
		if (dst[0] != val) break;
		dst++;
	}

#ifdef OPTIMISE
SCANEQU_RES:
#endif // OPTIMISE
	return len0 - len;

#endif // X86
}

///////////////////////////////////////////////////////////////////////////////
// scan not equal data string in UP direction (returns length of not equal data)

bint bignum::ScanNEquStr(const buint* dst, buint val, bint len)
{
#ifdef X86

	return ScanNEquStr_x64(dst, val, len);

#else // X86

	bint len0 = len;

#ifdef OPTIMISE
	for (; len >= 8; len -= 8)
	{
		if (dst[0] == val)  goto SCANNEQU_RES;
		if (dst[1] == val) { len -= 1; goto SCANNEQU_RES; }
		if (dst[2] == val) { len -= 2; goto SCANNEQU_RES; }
		if (dst[3] == val) { len -= 3; goto SCANNEQU_RES; }
		if (dst[4] == val) { len -= 4; goto SCANNEQU_RES; }
		if (dst[5] == val) { len -= 5; goto SCANNEQU_RES; }
		if (dst[6] == val) { len -= 6; goto SCANNEQU_RES; }
		if (dst[7] == val) { len -= 7; goto SCANNEQU_RES; }
		dst += 8;
	}
#endif // OPTIMISE

	for (; len > 0; len--)
	{
		if (dst[0] == val) break;
		dst++;
	}

#ifdef OPTIMISE
SCANNEQU_RES:
#endif // OPTIMISE
	return len0 - len;

#endif // X86
}

///////////////////////////////////////////////////////////////////////////////
// shift data string left to higher bits (shift=0..63, inputs and outputs carry bits)

buint bignum::LShiftStr(buint carry, int shift, buint* dst, buint* src, bint len)
{
	// zero length
	if (len <= 0) return carry;

	// no rotation, only fast copy string
	if (shift == 0)
	{
		bignum::CopyDownStr(dst, src, len);
		return 0;
	}

#ifdef X86

	return LShiftStr_x64(carry, shift, dst, src, len);

#else // X86

	// shift to end of string
	len--;
	dst += len;
	src += len;

	int shift2 = BIGBITS - shift;
	buint k, k2;

	buint carryout = src[0];
	k = carryout;

#ifdef OPTIMISE
	for (; len >= 8; len -= 8)
	{
		k2=src[-1]; dst[ 0]=(k<<shift)|(k2>>shift2); k=k2;
		k2=src[-2]; dst[-1]=(k<<shift)|(k2>>shift2); k=k2;
		k2=src[-3]; dst[-2]=(k<<shift)|(k2>>shift2); k=k2;
		k2=src[-4]; dst[-3]=(k<<shift)|(k2>>shift2); k=k2;
		k2=src[-5]; dst[-4]=(k<<shift)|(k2>>shift2); k=k2;
		k2=src[-6]; dst[-5]=(k<<shift)|(k2>>shift2); k=k2;
		k2=src[-7]; dst[-6]=(k<<shift)|(k2>>shift2); k=k2;
		k2=src[-8]; dst[-7]=(k<<shift)|(k2>>shift2); k=k2;
		src -= 8;
		dst -= 8;
	}
#endif // OPTIMISE

	for (; len > 0; len--)
	{
		k2 = src[-1];
		dst[0] = (k << shift) | (k2 >> shift2);
		k = k2;
		src--;
		dst--;
	}

	dst[0] = (k << shift) | ((carry << shift2) >> shift2);

	return carryout >> shift2;

#endif // X86
}

///////////////////////////////////////////////////////////////////////////////
// shift data string right to lower bits (shift=0..63, inputs and outputs carry bits)

buint bignum::RShiftStr(buint carry, int shift, buint* dst, buint* src, bint len)
{
	// zero length
	if (len <= 0) return carry;

	// no rotation, only fast copy string
	if (shift == 0)
	{
		bignum::CopyStr(dst, src, len);
		return 0;
	}

#ifdef X86

	return RShiftStr_x64(carry, shift, dst, src, len);

#else // X86

	len--;
	int shift2 = BIGBITS - shift;
	buint k, k2;

	buint carryout = src[0];
	k = src[0];

#ifdef OPTIMISE
	for (; len >= 8; len -= 8)
	{
		k2=src[1]; dst[0]=(k>>shift)|(k2<<shift2); k=k2;
		k2=src[2]; dst[1]=(k>>shift)|(k2<<shift2); k=k2;
		k2=src[3]; dst[2]=(k>>shift)|(k2<<shift2); k=k2;
		k2=src[4]; dst[3]=(k>>shift)|(k2<<shift2); k=k2;
		k2=src[5]; dst[4]=(k>>shift)|(k2<<shift2); k=k2;
		k2=src[6]; dst[5]=(k>>shift)|(k2<<shift2); k=k2;
		k2=src[7]; dst[6]=(k>>shift)|(k2<<shift2); k=k2;
		k2=src[8]; dst[7]=(k>>shift)|(k2<<shift2); k=k2;
		src += 8;
		dst += 8;
	}
#endif // OPTIMISE

	for (; len > 0; len--)
	{
		k2 = src[1];
		dst[0] = (k >> shift) | (k2 << shift2);
		k = k2;
		src++;
		dst++;
	}

	dst[0] = (k >> shift) | (carry << shift2);

	return (carryout << shift2) >> shift2;

#endif // X86
}

///////////////////////////////////////////////////////////////////////////////
// compare equal data string (returns length of equal data)

bint bignum::CompEquStr(const buint* num1, const buint* num2, bint len)
{
#ifdef X86

	return CompEquStr_x64(num1, num2, len);

#else // X86

	bint len0 = len;

#ifdef OPTIMISE
	for (; len >= 8; len -= 8)
	{
		if (num1[0] != num2[0])  goto COMPEQU_RES;
		if (num1[1] != num2[1]) { len -= 1; goto COMPEQU_RES; }
		if (num1[2] != num2[2]) { len -= 2; goto COMPEQU_RES; }
		if (num1[3] != num2[3]) { len -= 3; goto COMPEQU_RES; }
		if (num1[4] != num2[4]) { len -= 4; goto COMPEQU_RES; }
		if (num1[5] != num2[5]) { len -= 5; goto COMPEQU_RES; }
		if (num1[6] != num2[6]) { len -= 6; goto COMPEQU_RES; }
		if (num1[7] != num2[7]) { len -= 7; goto COMPEQU_RES; }
		num1 += 8;
		num2 += 8;
	}
#endif // OPTIMISE

	for (; len > 0; len--)
	{
		if (num1[0] != num2[0]) break;
		num1++;
		num2++;
	}

#ifdef OPTIMISE
COMPEQU_RES:
#endif // OPTIMISE
	return len0 - len;

#endif // X86
}

///////////////////////////////////////////////////////////////////////////////
// compare data string (returns COMP_LE,...)

int bignum::CompStr(const buint* num1, const buint* num2, bint len)
{
	if (num1 == num2) return COMP_EQ;

#ifdef X86

	return CompStr_x64(num1, num2, len);

#else // X86

	const buint* d = &num1[len - 1];
	const buint* s = &num2[len - 1];

#ifdef OPTIMISE
	for (; len >= 8; len -= 8)
	{
		if (d[ 0] != s[ 0]) return (d[ 0] > s[ 0]) ? COMP_GR : COMP_LE;
		if (d[-1] != s[-1]) return (d[-1] > s[-1]) ? COMP_GR : COMP_LE;
		if (d[-2] != s[-2]) return (d[-2] > s[-2]) ? COMP_GR : COMP_LE;
		if (d[-3] != s[-3]) return (d[-3] > s[-3]) ? COMP_GR : COMP_LE;
		if (d[-4] != s[-4]) return (d[-4] > s[-4]) ? COMP_GR : COMP_LE;
		if (d[-5] != s[-5]) return (d[-5] > s[-5]) ? COMP_GR : COMP_LE;
		if (d[-6] != s[-6]) return (d[-6] > s[-6]) ? COMP_GR : COMP_LE;
		if (d[-7] != s[-7]) return (d[-7] > s[-7]) ? COMP_GR : COMP_LE;
		d -= 8;
		s -= 8;
	}
#endif // OPTIMISE

	for (; len > 0; len--)
	{
		if (d[0] != s[0]) return (d[0] > s[0]) ? COMP_GR : COMP_LE;
		d--;
		s--;
	}

	return COMP_EQ;

#endif // X86
}

///////////////////////////////////////////////////////////////////////////////
// bit inverse data string

void bignum::NotStr(buint* dst, bint len)
{
#ifdef X86

	NotStr_x64(dst, len);
	return;

#else // X86

#ifdef OPTIMISE
	for (; len >= 8; len -= 8)
	{
		dst[0] = ~dst[0];
		dst[1] = ~dst[1];
		dst[2] = ~dst[2];
		dst[3] = ~dst[3];
		dst[4] = ~dst[4];
		dst[5] = ~dst[5];
		dst[6] = ~dst[6];
		dst[7] = ~dst[7];
		dst += 8;
	}
#endif // OPTIMISE

	for (; len > 0; len--)
	{
		dst[0] = ~dst[0];
		dst++;
	}

#endif // X86
}

void bignum::NotStr(buint* dst, const buint* src, bint len)
{
#ifdef X86

	NotStr2_x64(dst, src, len);
	return;

#else // X86

#ifdef OPTIMISE
	for (; len >= 8; len -= 8)
	{
		dst[0] = ~src[0];
		dst[1] = ~src[1];
		dst[2] = ~src[2];
		dst[3] = ~src[3];
		dst[4] = ~src[4];
		dst[5] = ~src[5];
		dst[6] = ~src[6];
		dst[7] = ~src[7];
		dst += 8;
		src += 8;
	}
#endif // OPTIMISE

	for (; len > 0; len--)
	{
		dst[0] = ~src[0];
		dst++;
		src++;
	}

#endif // X86
}

///////////////////////////////////////////////////////////////////////////////
// bit AND data string

void bignum::AndStr(buint* dst, const buint* src, bint len)
{
#ifdef X86

	AndStr_x64(dst, src, len);
	return;

#else // X86

#ifdef OPTIMISE

	for (; len >= 8; len -= 8)
	{
		dst[0] &= src[0];
		dst[1] &= src[1];
		dst[2] &= src[2];
		dst[3] &= src[3];
		dst[4] &= src[4];
		dst[5] &= src[5];
		dst[6] &= src[6];
		dst[7] &= src[7];
		dst += 8;
		src += 8;
	}

#endif // OPTIMISE

	for (; len > 0; len--)
	{
		dst[0] &= src[0];
		dst++;
		src++;
	}

#endif // X86
}

void bignum::AndStr(buint* dst, const buint* src1, const buint* src2, bint len)
{
#ifdef X86

	AndStr2_x64(dst, src1, src2, len);
	return;

#else // X86

#ifdef OPTIMISE

	for (; len >= 8; len -= 8)
	{
		dst[0] = src1[0] & src2[0];
		dst[1] = src1[1] & src2[1];
		dst[2] = src1[2] & src2[2];
		dst[3] = src1[3] & src2[3];
		dst[4] = src1[4] & src2[4];
		dst[5] = src1[5] & src2[5];
		dst[6] = src1[6] & src2[6];
		dst[7] = src1[7] & src2[7];
		dst += 8;
		src1 += 8;
		src2 += 8;
	}

#endif // OPTIMISE

	for (; len > 0; len--)
	{
		dst[0] = src1[0] & src2[0];
		dst++;
		src1++;
		src2++;
	}

#endif // X86
}

///////////////////////////////////////////////////////////////////////////////
// bit OR data string

void bignum::OrStr(buint* dst, const buint* src, bint len)
{
#ifdef X86

	OrStr_x64(dst, src, len);
	return;

#else // X86

#ifdef OPTIMISE
	
	for (; len >= 8; len -= 8)
	{
		dst[0] |= src[0];
		dst[1] |= src[1];
		dst[2] |= src[2];
		dst[3] |= src[3];
		dst[4] |= src[4];
		dst[5] |= src[5];
		dst[6] |= src[6];
		dst[7] |= src[7];
		dst += 8;
		src += 8;
	}

#endif // OPTIMISE

	for (; len > 0; len--)
	{
		dst[0] |= src[0];
		dst++;
		src++;
	}

#endif // X86
}

void bignum::OrStr(buint* dst, const buint* src1, const buint* src2, bint len)
{
#ifdef X86

	OrStr2_x64(dst, src1, src2, len);
	return;

#else // X86

#ifdef OPTIMISE
	
	for (; len >= 8; len -= 8)
	{
		dst[0] = src1[0] | src2[0];
		dst[1] = src1[1] | src2[1];
		dst[2] = src1[2] | src2[2];
		dst[3] = src1[3] | src2[3];
		dst[4] = src1[4] | src2[4];
		dst[5] = src1[5] | src2[5];
		dst[6] = src1[6] | src2[6];
		dst[7] = src1[7] | src2[7];
		dst += 8;
		src1 += 8;
		src2 += 8;
	}

#endif // OPTIMISE

	for (; len > 0; len--)
	{
		dst[0] = src1[0] | src2[0];
		dst++;
		src1++;
		src2++;
	}

#endif // X86
}

///////////////////////////////////////////////////////////////////////////////
// bit XOR data string

void bignum::XorStr(buint* dst, const buint* src, bint len)
{
#ifdef X86

	XorStr_x64(dst, src, len);
	return;

#else // X86

#ifdef OPTIMISE

	for (; len >= 8; len -= 8)
	{
		dst[0] ^= src[0];
		dst[1] ^= src[1];
		dst[2] ^= src[2];
		dst[3] ^= src[3];
		dst[4] ^= src[4];
		dst[5] ^= src[5];
		dst[6] ^= src[6];
		dst[7] ^= src[7];
		dst += 8;
		src += 8;
	}

#endif // OPTIMISE

	for (; len > 0; len--)
	{
		dst[0] ^= src[0];
		dst++;
		src++;
	}

#endif // X86
}

void bignum::XorStr(buint* dst, const buint* src1, const buint* src2, bint len)
{
#ifdef X86

	XorStr2_x64(dst, src1, src2, len);
	return;

#else // X86

#ifdef OPTIMISE

	for (; len >= 8; len -= 8)
	{
		dst[0] = src1[0] ^ src2[0];
		dst[1] = src1[1] ^ src2[1];
		dst[2] = src1[2] ^ src2[2];
		dst[3] = src1[3] ^ src2[3];
		dst[4] = src1[4] ^ src2[4];
		dst[5] = src1[5] ^ src2[5];
		dst[6] = src1[6] ^ src2[6];
		dst[7] = src1[7] ^ src2[7];
		dst += 8;
		src1 += 8;
		src2 += 8;
	}

#endif // OPTIMISE

	for (; len > 0; len--)
	{
		dst[0] = src1[0] ^ src2[0];
		dst++;
		src1++;
		src2++;
	}

#endif // X86
}

///////////////////////////////////////////////////////////////////////////////
// negate data string (returns carry flag - set if number is not 0)

buint bignum::NegStr(buint* dst, bint len)
{
	// skip zero part of data
	bint len0 = bignum::ScanEquStr(dst, 0, len);
	dst += len0;
	len -= len0;

	// number is 0, no carry
	if (len == 0) return 0;

	// negate first entry
	dst[0] = -(bint)dst[0];
	dst++;
	len--;

	// bit inverse rest of data
	bignum::NotStr(dst, len);

	// return carry
	return 1;
}

buint bignum::NegStr(buint* dst, const buint* src, bint len)
{
	// skip zero part of data
	bint len0 = bignum::ScanEquStr(src, 0, len);
	src += len0;
	len -= len0;

	// fill zero part
	bignum::FillStr(dst, 0, len0);
	dst += len0;

	// number is 0, no carry
	if (len == 0) return 0;

	// negate first entry
	dst[0] = -(bint)src[0];
	dst++;
	src++;
	len--;

	// bit inverse rest of data
	bignum::NotStr(dst, src, len);

	// return carry
	return 1;
}

///////////////////////////////////////////////////////////////////////////////
// increment data string (returns carry flag)

buint bignum::IncStr(buint* dst, bint len)
{
#ifdef X86

	return IncStr_x64(dst, len);

#else // X86

	buint r;

#ifdef OPTIMISE

	for (; len >= 8; len -= 8)
	{
		r=dst[0]+1; dst[0]=r; if (r!=0) return 0;
		r=dst[1]+1; dst[1]=r; if (r!=0) return 0;
		r=dst[2]+1; dst[2]=r; if (r!=0) return 0;
		r=dst[3]+1; dst[3]=r; if (r!=0) return 0;
		r=dst[4]+1; dst[4]=r; if (r!=0) return 0;
		r=dst[5]+1; dst[5]=r; if (r!=0) return 0;
		r=dst[6]+1; dst[6]=r; if (r!=0) return 0;
		r=dst[7]+1; dst[7]=r; if (r!=0) return 0;
		dst += 8;
	}

#endif // OPTIMISE

	for (; len > 0; len--)
	{
		r = dst[0] + 1;
		dst[0] = r;
		if (r != 0) return 0;
		dst++;
	}

	return 1;

#endif // X86
}

buint bignum::IncStr(buint* dst, const buint* src, bint len)
{
#ifdef X86

	return IncStr2_x64(dst, src, len);

#else // X86

	buint r;

#ifdef OPTIMISE

	for (; len >= 8; len -= 8)
	{
		r=src[0]+1; dst[0]=r; if (r!=0) { bignum::CopyStr(dst+1, src+1, len-1); return 0; }
		r=src[1]+1; dst[1]=r; if (r!=0) { bignum::CopyStr(dst+2, src+2, len-2); return 0; }
		r=src[2]+1; dst[2]=r; if (r!=0) { bignum::CopyStr(dst+3, src+3, len-3); return 0; }
		r=src[3]+1; dst[3]=r; if (r!=0) { bignum::CopyStr(dst+4, src+4, len-4); return 0; }
		r=src[4]+1; dst[4]=r; if (r!=0) { bignum::CopyStr(dst+5, src+5, len-5); return 0; }
		r=src[5]+1; dst[5]=r; if (r!=0) { bignum::CopyStr(dst+6, src+6, len-6); return 0; }
		r=src[6]+1; dst[6]=r; if (r!=0) { bignum::CopyStr(dst+7, src+7, len-7); return 0; }
		r=src[7]+1; dst[7]=r; if (r!=0) { bignum::CopyStr(dst+8, src+8, len-8); return 0; }
		dst += 8;
		src += 8;
	}

#endif // OPTIMISE

	for (; len > 0; len--)
	{
		r = src[0] + 1;
		dst[0] = r;
		if (r != 0) { bignum::CopyStr(dst+1, src+1, len-1); return 0; }
		dst++;
		src++;
	}

	return 1;

#endif // X86
}

///////////////////////////////////////////////////////////////////////////////
// decrement data string (returns carry flag)

buint bignum::DecStr(buint* dst, bint len)
{
#ifdef X86

	return DecStr_x64(dst, len);

#else // X86

	buint r;

#ifdef OPTIMISE

	for (; len >= 8; len -= 8)
	{
		r=dst[0]-1; dst[0]=r; if (r!=BIGMAX) return 0;
		r=dst[1]-1; dst[1]=r; if (r!=BIGMAX) return 0;
		r=dst[2]-1; dst[2]=r; if (r!=BIGMAX) return 0;
		r=dst[3]-1; dst[3]=r; if (r!=BIGMAX) return 0;
		r=dst[4]-1; dst[4]=r; if (r!=BIGMAX) return 0;
		r=dst[5]-1; dst[5]=r; if (r!=BIGMAX) return 0;
		r=dst[6]-1; dst[6]=r; if (r!=BIGMAX) return 0;
		r=dst[7]-1; dst[7]=r; if (r!=BIGMAX) return 0;
		dst += 8;
	}

#endif // OPTIMISE

	for (; len > 0; len--)
	{
		r = dst[0] - 1;
		dst[0] = r;
		if (r != BIGMAX) return 0;
		dst++;
	}

	return 1;

#endif // X86
}

buint bignum::DecStr(buint* dst, const buint* src, bint len)
{
#ifdef X86

	return DecStr2_x64(dst, src, len);

#else // X86

	buint r;

#ifdef OPTIMISE

	for (; len >= 8; len -= 8)
	{
		r=src[0]-1; dst[0]=r; if (r!=BIGMAX) { bignum::CopyStr(dst+1, src+1, len-1); return 0; }
		r=src[1]-1; dst[1]=r; if (r!=BIGMAX) { bignum::CopyStr(dst+2, src+2, len-2); return 0; }
		r=src[2]-1; dst[2]=r; if (r!=BIGMAX) { bignum::CopyStr(dst+3, src+3, len-3); return 0; }
		r=src[3]-1; dst[3]=r; if (r!=BIGMAX) { bignum::CopyStr(dst+4, src+4, len-4); return 0; }
		r=src[4]-1; dst[4]=r; if (r!=BIGMAX) { bignum::CopyStr(dst+5, src+5, len-5); return 0; }
		r=src[5]-1; dst[5]=r; if (r!=BIGMAX) { bignum::CopyStr(dst+6, src+6, len-6); return 0; }
		r=src[6]-1; dst[6]=r; if (r!=BIGMAX) { bignum::CopyStr(dst+7, src+7, len-7); return 0; }
		r=src[7]-1; dst[7]=r; if (r!=BIGMAX) { bignum::CopyStr(dst+8, src+8, len-8); return 0; }
		dst += 8;
		src += 8;
	}

#endif // OPTIMISE

	for (; len > 0; len--)
	{
		r = src[0] - 1;
		dst[0] = r;
		if (r != BIGMAX) { bignum::CopyStr(dst+1, src+1, len-1); return 0; }
		dst++;
		src++;
	}

	return 1;

#endif // X86
}

///////////////////////////////////////////////////////////////////////////////
// add data string (outputs carry)

buint bignum::AddStr(buint cy, buint* dst, const buint* src, bint len)
{
#ifdef X86

	return AddStr_x64(cy, dst, src, len);

#else // X86

	buint c = cy;
	buint a, b;
#ifdef OPTIMISE
	for (; len >= 8; len -= 8)
	{
		a=dst[0]+c; c=(a<c)?1:0; b=src[0]; a+=b; c+=(a<b)?1:0; dst[0]=a;
		a=dst[1]+c; c=(a<c)?1:0; b=src[1]; a+=b; c+=(a<b)?1:0; dst[1]=a;
		a=dst[2]+c; c=(a<c)?1:0; b=src[2]; a+=b; c+=(a<b)?1:0; dst[2]=a;
		a=dst[3]+c; c=(a<c)?1:0; b=src[3]; a+=b; c+=(a<b)?1:0; dst[3]=a;
		a=dst[4]+c; c=(a<c)?1:0; b=src[4]; a+=b; c+=(a<b)?1:0; dst[4]=a;
		a=dst[5]+c; c=(a<c)?1:0; b=src[5]; a+=b; c+=(a<b)?1:0; dst[5]=a;
		a=dst[6]+c; c=(a<c)?1:0; b=src[6]; a+=b; c+=(a<b)?1:0; dst[6]=a;
		a=dst[7]+c; c=(a<c)?1:0; b=src[7]; a+=b; c+=(a<b)?1:0; dst[7]=a;
		dst += 8;
		src += 8;
	}
#endif // OPTIMISE

	for (; len > 0; len--)
	{
		a = dst[0] + c;
		c = (a < c) ? 1 : 0;
		b = src[0];
		a += b;
		c += (a < b) ? 1 : 0;
		dst[0] = a;
		dst++;
		src++;
	}
	return c;

#endif // X86
}

buint bignum::AddStr(buint cy, buint* dst, const buint* src1, const buint* src2, bint len)
{
#ifdef X86

	return AddStr2_x64(cy, dst, src1, src2, len);

#else // X86

	buint c = cy;
	buint a, b;
#ifdef OPTIMISE
	for (; len >= 8; len -= 8)
	{
		a=src1[0]+c; c=(a<c)?1:0; b=src2[0]; a+=b; c+=(a<b)?1:0; dst[0]=a;
		a=src1[1]+c; c=(a<c)?1:0; b=src2[1]; a+=b; c+=(a<b)?1:0; dst[1]=a;
		a=src1[2]+c; c=(a<c)?1:0; b=src2[2]; a+=b; c+=(a<b)?1:0; dst[2]=a;
		a=src1[3]+c; c=(a<c)?1:0; b=src2[3]; a+=b; c+=(a<b)?1:0; dst[3]=a;
		a=src1[4]+c; c=(a<c)?1:0; b=src2[4]; a+=b; c+=(a<b)?1:0; dst[4]=a;
		a=src1[5]+c; c=(a<c)?1:0; b=src2[5]; a+=b; c+=(a<b)?1:0; dst[5]=a;
		a=src1[6]+c; c=(a<c)?1:0; b=src2[6]; a+=b; c+=(a<b)?1:0; dst[6]=a;
		a=src1[7]+c; c=(a<c)?1:0; b=src2[7]; a+=b; c+=(a<b)?1:0; dst[7]=a;
		dst += 8;
		src1 += 8;
		src2 += 8;
	}
#endif // OPTIMISE

	for (; len > 0; len--)
	{
		a = src1[0] + c;
		c = (a < c) ? 1 : 0;
		b = src2[0];
		a += b;
		c += (a < b) ? 1 : 0;
		dst[0] = a;
		dst++;
		src1++;
		src2++;
	}
	return c;

#endif // X86
}

///////////////////////////////////////////////////////////////////////////////
// sub data string (outputs carry)

// dst = dst - src
buint bignum::SubStr(buint cy, buint* dst, const buint* src, bint len)
{
#ifdef X86

	return SubStr_x64(cy, dst, src, len);

#else // X86

	buint c = cy;
	buint a, b;
#ifdef OPTIMISE
	for (; len >= 8; len -= 8)
	{
		b=src[0]+c; c=(b<c)?1:0; a=dst[0]; c+=(a<b)?1:0; dst[0]=a-b;
		b=src[1]+c; c=(b<c)?1:0; a=dst[1]; c+=(a<b)?1:0; dst[1]=a-b;
		b=src[2]+c; c=(b<c)?1:0; a=dst[2]; c+=(a<b)?1:0; dst[2]=a-b;
		b=src[3]+c; c=(b<c)?1:0; a=dst[3]; c+=(a<b)?1:0; dst[3]=a-b;
		b=src[4]+c; c=(b<c)?1:0; a=dst[4]; c+=(a<b)?1:0; dst[4]=a-b;
		b=src[5]+c; c=(b<c)?1:0; a=dst[5]; c+=(a<b)?1:0; dst[5]=a-b;
		b=src[6]+c; c=(b<c)?1:0; a=dst[6]; c+=(a<b)?1:0; dst[6]=a-b;
		b=src[7]+c; c=(b<c)?1:0; a=dst[7]; c+=(a<b)?1:0; dst[7]=a-b;
		dst += 8;
		src += 8;
	}
#endif // OPTIMISE

	for (; len > 0; len--)
	{
		b = src[0] + c;
		c = (b < c) ? 1 : 0;
		a = dst[0];
		c += (a < b) ? 1 : 0;
		dst[0] = a - b;
		dst++;
		src++;
	}
	return c;

#endif // X86
}

// dst = src1 - src2
buint bignum::SubStr(buint cy, buint* dst, const buint* src1, const buint* src2, bint len)
{
#ifdef X86

	return SubStr2_x64(cy, dst, src1, src2, len);

#else // X86

	buint c = cy;
	buint a, b;
#ifdef OPTIMISE
	for (; len >= 8; len -= 8)
	{
		b=src2[0]+c; c=(b<c)?1:0; a=src1[0]; c+=(a<b)?1:0; dst[0]=a-b;
		b=src2[1]+c; c=(b<c)?1:0; a=src1[1]; c+=(a<b)?1:0; dst[1]=a-b;
		b=src2[2]+c; c=(b<c)?1:0; a=src1[2]; c+=(a<b)?1:0; dst[2]=a-b;
		b=src2[3]+c; c=(b<c)?1:0; a=src1[3]; c+=(a<b)?1:0; dst[3]=a-b;
		b=src2[4]+c; c=(b<c)?1:0; a=src1[4]; c+=(a<b)?1:0; dst[4]=a-b;
		b=src2[5]+c; c=(b<c)?1:0; a=src1[5]; c+=(a<b)?1:0; dst[5]=a-b;
		b=src2[6]+c; c=(b<c)?1:0; a=src1[6]; c+=(a<b)?1:0; dst[6]=a-b;
		b=src2[7]+c; c=(b<c)?1:0; a=src1[7]; c+=(a<b)?1:0; dst[7]=a-b;
		dst += 8;
		src1 += 8;
		src2 += 8;
	}
#endif // OPTIMISE

	for (; len > 0; len--)
	{
		b = src2[0] + c;
		c = (b < c) ? 1 : 0;
		a = src1[0];
		c += (a < b) ? 1 : 0;
		dst[0] = a - b;
		dst++;
		src1++;
		src2++;
	}
	return c;

#endif // X86
}

// dst = src - dst
buint bignum::InvSubStr(buint cy, buint* dst, const buint* src, bint len)
{
#ifdef X86

	return InvSubStr_x64(cy, dst, src, len);

#else // X86

	buint c = cy;
	buint a, b;
#ifdef OPTIMISE
	for (; len >= 8; len -= 8)
	{
		b=dst[0]+c; c=(b<c)?1:0; a=src[0]; c+=(a<b)?1:0; dst[0]=a-b;
		b=dst[1]+c; c=(b<c)?1:0; a=src[1]; c+=(a<b)?1:0; dst[1]=a-b;
		b=dst[2]+c; c=(b<c)?1:0; a=src[2]; c+=(a<b)?1:0; dst[2]=a-b;
		b=dst[3]+c; c=(b<c)?1:0; a=src[3]; c+=(a<b)?1:0; dst[3]=a-b;
		b=dst[4]+c; c=(b<c)?1:0; a=src[4]; c+=(a<b)?1:0; dst[4]=a-b;
		b=dst[5]+c; c=(b<c)?1:0; a=src[5]; c+=(a<b)?1:0; dst[5]=a-b;
		b=dst[6]+c; c=(b<c)?1:0; a=src[6]; c+=(a<b)?1:0; dst[6]=a-b;
		b=dst[7]+c; c=(b<c)?1:0; a=src[7]; c+=(a<b)?1:0; dst[7]=a-b;
		dst += 8;
		src += 8;
	}
#endif // OPTIMISE

	for (; len > 0; len--)
	{
		b = dst[0] + c;
		c = (b < c) ? 1 : 0;
		a = src[0];
		c += (a < b) ? 1 : 0;
		dst[0] = a - b;
		dst++;
		src++;
	}
	return c;

#endif // X86
}

///////////////////////////////////////////////////////////////////////////////
// negate data string with decrement, dst = -1 - src - cy (outputs carry)

buint bignum::NegDecStr(buint cy, buint* dst, const buint* src, bint len)
{
	if (cy == 0)
	{
		bignum::NotStr(dst, src, len);
		return 0;
	}

	// (ASM would be not more effective)
	buint r;
#ifdef OPTIMISE
	for (; len >= 8; len -= 8)
	{
		r=src[0]; if (r!=BIGMAX) { dst[0]=-(bint)r-2; bignum::NotStr(dst+1,src+1,len-1); return 0; } dst[0]=-1;
		r=src[1]; if (r!=BIGMAX) { dst[1]=-(bint)r-2; bignum::NotStr(dst+2,src+2,len-2); return 0; } dst[1]=-1;
		r=src[2]; if (r!=BIGMAX) { dst[2]=-(bint)r-2; bignum::NotStr(dst+3,src+3,len-3); return 0; } dst[2]=-1;
		r=src[3]; if (r!=BIGMAX) { dst[3]=-(bint)r-2; bignum::NotStr(dst+4,src+4,len-4); return 0; } dst[3]=-1;
		r=src[4]; if (r!=BIGMAX) { dst[4]=-(bint)r-2; bignum::NotStr(dst+5,src+5,len-5); return 0; } dst[4]=-1;
		r=src[5]; if (r!=BIGMAX) { dst[5]=-(bint)r-2; bignum::NotStr(dst+6,src+6,len-6); return 0; } dst[5]=-1;
		r=src[6]; if (r!=BIGMAX) { dst[6]=-(bint)r-2; bignum::NotStr(dst+7,src+7,len-7); return 0; } dst[6]=-1;
		r=src[7]; if (r!=BIGMAX) { dst[7]=-(bint)r-2; bignum::NotStr(dst+8,src+8,len-8); return 0; } dst[7]=-1;
		dst += 8;
		src += 8;
	}
#endif // OPTIMISE

	for (; len > 0; len--)
	{
		r = src[0];
		if (r!=BIGMAX)
		{
			dst[0] = -(bint)r - 2;
			bignum::NotStr(dst+1, src+1, len-1);
			return 0;
		}
		dst[0] = -1;
		dst++;
		src++;
	}
	return 1;
}

buint bignum::NegDecStr(buint cy, buint* dst, bint len)
{
	if (cy == 0)
	{
		bignum::NotStr(dst, len);
		return 0;
	}

	// (ASM would be not more effective)
	buint r;
#ifdef OPTIMISE
	for (; len >= 8; len -= 8)
	{
		r=dst[0]; if (r!=BIGMAX) { dst[0]=-(bint)r-2; bignum::NotStr(dst+1,len-1); return 0; }
		r=dst[1]; if (r!=BIGMAX) { dst[1]=-(bint)r-2; bignum::NotStr(dst+2,len-2); return 0; }
		r=dst[2]; if (r!=BIGMAX) { dst[2]=-(bint)r-2; bignum::NotStr(dst+3,len-3); return 0; }
		r=dst[3]; if (r!=BIGMAX) { dst[3]=-(bint)r-2; bignum::NotStr(dst+4,len-4); return 0; }
		r=dst[4]; if (r!=BIGMAX) { dst[4]=-(bint)r-2; bignum::NotStr(dst+5,len-5); return 0; }
		r=dst[5]; if (r!=BIGMAX) { dst[5]=-(bint)r-2; bignum::NotStr(dst+6,len-6); return 0; }
		r=dst[6]; if (r!=BIGMAX) { dst[6]=-(bint)r-2; bignum::NotStr(dst+7,len-7); return 0; }
		r=dst[7]; if (r!=BIGMAX) { dst[7]=-(bint)r-2; bignum::NotStr(dst+8,len-8); return 0; }
		dst += 8;
	}
#endif // OPTIMISE

	for (; len > 0; len--)
	{
		r = dst[0];
		if (r!=BIGMAX)
		{
			dst[0] = -(bint)r - 2;
			bignum::NotStr(dst+1, len-1);
			return 0;
		}
		dst++;
	}
	return 1;
}

///////////////////////////////////////////////////////////////////////////////
// multiply data string by one word (inputs and outputs carry bits))

buint bignum::MulStr(buint carry, buint num, buint* dst, const buint* src, bint len)
{
#ifdef X86

	return MulStr_x64(carry, num, dst, src, len);

#else // X86

	buintH numL = (buintH)num;
	buintH numH = (buintH)(num >> BIGBITS12);
	buint rL, rH;

	// num is max. 1/2 size
	if (numH == 0)
	{
		if (numL == 0)
		{
			for (; len > 0; len--)
			{
				dst[0] = 0;
				dst++;
			}
			return 0;
		}

		if (numL == 1)
		{
			for (; len > 0; len--)
			{
				dst[0] = src[0];
				src++;
				dst++;
			}
			return 0;
		}

		for (; len > 0; len--)
		{
			MulWL(&rL, &rH, src[0], numL); // rH:rL = max. 0000FFFE:FFFF0001
			rL += carry;
			carry = (rL < carry) ? 1 : 0;
			carry += rH; // carry max. 0000FFFF
			dst[0] = rL;
			src++;
			dst++;
		}
	}
	else
	{
		// num is full size
		for (; len > 0; len--)
		{
			MulWW(&rL, &rH, src[0], numL, numH); // rH:rL = max. FFFFFFFE:00000001
			rL += carry;
			carry = (rL < carry) ? 1 : 0;
			carry += rH; // carry max. FFFFFFFF
			dst[0] = rL;
			src++;
			dst++;
		}
	}

	return carry;

#endif // X86
}

///////////////////////////////////////////////////////////////////////////////
// multiply data string by one world and add to destination (inputs and outputs carry bits)

buint bignum::MulAddStr(buint carry, buint num, buint* dst, const buint* src, bint len)
{
#ifdef X86

	return MulAddStr_x64(carry, num, dst, src, len);

#else // X86

	buintH numL = (buintH)num;
	buintH numH = (buintH)(num >> BIGBITS12);
	buint r, rL, rH;

	// num is max. 1/2 size
	if (numH == 0)
	{
		if (numL == 0) return 0;

		if (numL == 1)
		{
			for (; len > 0; len--)
			{
				rL = src[0];
				rL += carry;
				carry = (rL < carry) ? 1 : 0;
				r = dst[0];
				rL += r;
				carry += (rL < r) ? 1 : 0;
				dst[0] = rL;
				src++;
				dst++;
			}
		}
		else
		{
			for (; len > 0; len--)
			{
				MulWL(&rL, &rH, src[0], numL); // rH:rL = max. 0000FFFE:FFFF0001
				rL += carry;
				carry = (rL < carry) ? 1 : 0;
				carry += rH; // carry max. 0000FFFF
				r = dst[0];
				rL += r;
				carry += (rL < r) ? 1 : 0;
				dst[0] = rL;
				src++;
				dst++;
			}
		}
	}
	else
	{
		// num is full size
		for (; len > 0; len--)
		{
			MulWW(&rL, &rH, src[0], numL, numH); // rH:rL = max. FFFFFFFE:00000001
			rL += carry;
			carry = (rL < carry) ? 1 : 0;
			carry += rH; // carry max. FFFFFFFF
			r = dst[0];
			rL += r;
			carry += (rL < r) ? 1 : 0; // if rH == FFFFFFFE, no carry here, cannot overflow
			dst[0] = rL;
			src++;
			dst++;
		}
	}

	return carry;

#endif // X86
}

///////////////////////////////////////////////////////////////////////////////
// square data string (len is size of source, destination is sized 2*len)

void bignum::SqrStr(buint* dst, const buint* src, bint len)
{
#ifdef OPTIMISE
 
	const buint* s = src;
	buint* d = dst;
	bint n = len;

	// clear result
	bignum::FillStr(dst, 0, len*2);

	// multiply first half
	for (; n > 0; n--)
	{
		d++;
		d[n-1] = bignum::MulAddStr(0, *s, d, s+1, n-1);
		d++;
		s++;
	}

	// double result
	bignum::AddStr(0, dst, dst, 2*len);

	// add square of entries
#ifdef X86

	AddSqrStr_x64(dst, src, len);

#else // X86

	buint r, rL, rH;
	buint carry = 0;
	for (; len > 0; len--)
	{
		SqrWW(&rL, &rH, src[0]); // rH:rL = max. FFFFFFFE:00000001

		rL += carry;
		carry = (rL < carry) ? 1 : 0;
		carry += rH; // carry max. FFFFFFFF

		r = dst[0];
		rL += r;
		carry += (rL < r) ? 1 : 0; // if rH == FFFFFFFE, no carry here, cannot overflow
		dst[0] = rL;
		dst++;

		r = dst[0];
		carry += r;
		dst[0] = carry;
		carry = (carry < r) ? 1 : 0;
		dst++;

		src++;
	}

#endif // X86

#else // OPTIMISE

	const buint* s1 = src;
	const buint* s2 = src;
	buint* d = dst;
	bint n = len;
	d[len] = bignum::MulStr(0, *s1, d, s2, len);
	for (n--; n > 0; n--)
	{
		s1++;
		d++;
		d[len] = bignum::MulAddStr(0, *s1, d, s2, len);
	}

#endif // OPTIMISE
}

///////////////////////////////////////////////////////////////////////////////
// divide data string by an integer (returns reminder = carry)

buint bignum::DivStr(buint carry, buint num, buint* dst, const buint* src, bint len)
{
#ifdef X86

	return DivStr_x64(carry, num, dst, src, len);

#else // X86

	dst += len - 1;
	src += len - 1;

	buint r, a, b, c, d, e;

	// num is max. 1/2 size
	if (num <= BIGMASK12)
	{
		buintH numH = (buintH)num;

		for (; len > 0; len--)
		{
			r = src[0];

			a = (r >> BIGBITS12) + (carry << BIGBITS12);
			b = a/numH;
			carry = a - b*numH;

			a = (r & BIGMASK12) + (carry << BIGBITS12);
			c = a/numH;
			carry = a - c*numH;

			dst[0] = (b << BIGBITS12) + c;

			src--;
			dst--;
		}
	}

	// num is max 3/4 size
	else if (num <= BIGMASK34)
	{
		for (; len > 0; len--)
		{
			r = src[0];

			a = (r >> BIGBITS34) + (carry << BIGBITS14);
			b = a/num;
			carry = a - b*num;

			a = ((r >> BIGBITS12) & BIGMASK14) + (carry << BIGBITS14);
			c = a/num;
			carry = a - c*num;

			a = ((r >> BIGBITS14) & BIGMASK14) + (carry << BIGBITS14);
			d = a/num;
			carry = a - d*num;

			a = (r & BIGMASK14) + (carry << BIGBITS14);
			e = a/num;
			carry = a - e*num;

			dst[0] = (b << BIGBITS34) + (c << BIGBITS12) + (d << BIGBITS14) + e;

			src--;
			dst--;
		}
	}

	// num is full number of bits
	else if (num >= BIGLASTBIT)
	{
		int n;
		buint acc, mask;

		for (; len > 0; len--)
		{
			r = src[0];
			acc = 0;
			mask = BIGLASTBIT;

#ifdef OPTIMISE
			for (n = BIGBITS/8; n > 0; n--)
			{
				c=carry; carry=(carry<<1)|(r>>(BIGBITS-1)); if(((c&BIGLASTBIT)!=0)||(num<=carry)){acc|=mask;carry-=num;} r<<=1; mask>>=1;
				c=carry; carry=(carry<<1)|(r>>(BIGBITS-1)); if(((c&BIGLASTBIT)!=0)||(num<=carry)){acc|=mask;carry-=num;} r<<=1; mask>>=1;
				c=carry; carry=(carry<<1)|(r>>(BIGBITS-1)); if(((c&BIGLASTBIT)!=0)||(num<=carry)){acc|=mask;carry-=num;} r<<=1; mask>>=1;
				c=carry; carry=(carry<<1)|(r>>(BIGBITS-1)); if(((c&BIGLASTBIT)!=0)||(num<=carry)){acc|=mask;carry-=num;} r<<=1; mask>>=1;
				c=carry; carry=(carry<<1)|(r>>(BIGBITS-1)); if(((c&BIGLASTBIT)!=0)||(num<=carry)){acc|=mask;carry-=num;} r<<=1; mask>>=1;
				c=carry; carry=(carry<<1)|(r>>(BIGBITS-1)); if(((c&BIGLASTBIT)!=0)||(num<=carry)){acc|=mask;carry-=num;} r<<=1; mask>>=1;
				c=carry; carry=(carry<<1)|(r>>(BIGBITS-1)); if(((c&BIGLASTBIT)!=0)||(num<=carry)){acc|=mask;carry-=num;} r<<=1; mask>>=1;
				c=carry; carry=(carry<<1)|(r>>(BIGBITS-1)); if(((c&BIGLASTBIT)!=0)||(num<=carry)){acc|=mask;carry-=num;} r<<=1; mask>>=1;
			}
#else // OPTIMISE
			for (n = BIGBITS; n > 0; n--)
			{
				c = carry;
				carry = (carry << 1) | (r >> (BIGBITS - 1));
				if (((c & BIGLASTBIT) != 0) || (num <= carry))
				{
					acc |= mask;
					carry -= num;
				}
				r <<= 1;
				mask >>= 1;
			}
#endif // OPTIMISE

			dst[0] = acc;

			src--;
			dst--;
		}
	}

	// num is above 3/4, but not full number of bits
	else
	{
		int n;
		buint acc, mask;

		for (; len > 0; len--)
		{
			r = src[0];
			acc = 0;
			mask = BIGLASTBIT;

#ifdef OPTIMISE
			for (n = BIGBITS/8; n > 0; n--)
			{
				carry=(carry<<1)|(r>>(BIGBITS-1)); if(num<=carry){acc|=mask;carry-=num;} r<<=1; mask>>=1;
				carry=(carry<<1)|(r>>(BIGBITS-1)); if(num<=carry){acc|=mask;carry-=num;} r<<=1; mask>>=1;
				carry=(carry<<1)|(r>>(BIGBITS-1)); if(num<=carry){acc|=mask;carry-=num;} r<<=1; mask>>=1;
				carry=(carry<<1)|(r>>(BIGBITS-1)); if(num<=carry){acc|=mask;carry-=num;} r<<=1; mask>>=1;
				carry=(carry<<1)|(r>>(BIGBITS-1)); if(num<=carry){acc|=mask;carry-=num;} r<<=1; mask>>=1;
				carry=(carry<<1)|(r>>(BIGBITS-1)); if(num<=carry){acc|=mask;carry-=num;} r<<=1; mask>>=1;
				carry=(carry<<1)|(r>>(BIGBITS-1)); if(num<=carry){acc|=mask;carry-=num;} r<<=1; mask>>=1;
				carry=(carry<<1)|(r>>(BIGBITS-1)); if(num<=carry){acc|=mask;carry-=num;} r<<=1; mask>>=1;
			}
#else // OPTIMISE
			for (n = BIGBITS; n > 0; n--)
			{
				carry = (carry << 1) | (r >> (BIGBITS - 1));
				if (num <= carry)
				{
					acc |= mask;
					carry -= num;
				}
				r <<= 1;
				mask >>= 1;
			}
#endif // OPTIMISE

			dst[0] = acc;

			src--;
			dst--;
		}
	}

	return carry;

#endif // X86
}

///////////////////////////////////////////////////////////////////////////////
// expand number division by an integer (returns length of expansion)

bint bignum::DivUExpStr(buint carry, buint num, buint* dst, bint maxlen)
{
#ifdef X86

	return DivUExpStr_x64(carry, num, dst, maxlen);

#else // X86

	buint a, b, c, d, e;
	bint len = 0;

	// num is max. 1/2 size
	if (num <= BIGMASK12)
	{
		buintH numH = (buintH)num;

		while ((carry != 0) && (len < maxlen))
		{
			a = carry << BIGBITS12;
			b = a/num;
			carry = a - b*num;

			a = carry << BIGBITS12;
			c = a/num;
			carry = a - c*num;

			len++;
			dst--;
			*dst = (b << BIGBITS12) + c;
		}
	}

	// num is max 3/4 size
	else if (num <= BIGMASK34)
	{
		while ((carry != 0) && (len < maxlen))
		{
			a = carry << BIGBITS14;
			b = a/num;
			carry = a - b*num;

			a = carry << BIGBITS14;
			c = a/num;
			carry = a - c*num;

			a = carry << BIGBITS14;
			d = a/num;
			carry = a - d*num;

			a = carry << BIGBITS14;
			e = a/num;
			carry = a - e*num;

			len++;
			dst--;
			*dst = (b << BIGBITS34) + (c << BIGBITS12) + (d << BIGBITS14) + e;
		}
	}

	// num is full number of bits
	else if (num >= BIGLASTBIT)
	{
		int n;
		buint acc, mask;

		while ((carry != 0) && (len < maxlen))
		{
			acc = 0;
			mask = BIGLASTBIT;

#ifdef OPTIMISE
			for (n = BIGBITS/8; n > 0; n--)
			{
				c=carry; carry=(carry<<1); if(((c&BIGLASTBIT)!=0)||(num>=carry)){acc|=mask;carry-=num;} mask>>=1;
				c=carry; carry=(carry<<1); if(((c&BIGLASTBIT)!=0)||(num>=carry)){acc|=mask;carry-=num;} mask>>=1;
				c=carry; carry=(carry<<1); if(((c&BIGLASTBIT)!=0)||(num>=carry)){acc|=mask;carry-=num;} mask>>=1;
				c=carry; carry=(carry<<1); if(((c&BIGLASTBIT)!=0)||(num>=carry)){acc|=mask;carry-=num;} mask>>=1;
				c=carry; carry=(carry<<1); if(((c&BIGLASTBIT)!=0)||(num>=carry)){acc|=mask;carry-=num;} mask>>=1;
				c=carry; carry=(carry<<1); if(((c&BIGLASTBIT)!=0)||(num>=carry)){acc|=mask;carry-=num;} mask>>=1;
				c=carry; carry=(carry<<1); if(((c&BIGLASTBIT)!=0)||(num>=carry)){acc|=mask;carry-=num;} mask>>=1;
				c=carry; carry=(carry<<1); if(((c&BIGLASTBIT)!=0)||(num>=carry)){acc|=mask;carry-=num;} mask>>=1;
			}
#else // OPTIMISE
			for (n = BIGBITS; n > 0; n--)
			{
				c=carry; carry=(carry<<1); if(((c&BIGLASTBIT)!=0)||(num>=carry)){acc|=mask;carry-=num;} mask>>=1;
			}
#endif // OPTIMISE

			len++;
			dst--;
			*dst = acc;
		}
	}

	// num is above 3/4, but not full number of bits
	else
	{
		int n;
		buint acc, mask;

		while ((carry != 0) && (len < maxlen))
		{
			acc = 0;
			mask = BIGLASTBIT;

#ifdef OPTIMISE
			for (n = BIGBITS/8; n > 0; n--)
			{
				carry=(carry<<1); if(num>=carry){acc|=mask;carry-=num;} mask>>=1;
				carry=(carry<<1); if(num>=carry){acc|=mask;carry-=num;} mask>>=1;
				carry=(carry<<1); if(num>=carry){acc|=mask;carry-=num;} mask>>=1;
				carry=(carry<<1); if(num>=carry){acc|=mask;carry-=num;} mask>>=1;
				carry=(carry<<1); if(num>=carry){acc|=mask;carry-=num;} mask>>=1;
				carry=(carry<<1); if(num>=carry){acc|=mask;carry-=num;} mask>>=1;
				carry=(carry<<1); if(num>=carry){acc|=mask;carry-=num;} mask>>=1;
				carry=(carry<<1); if(num>=carry){acc|=mask;carry-=num;} mask>>=1;
			}
#else // OPTIMISE
			for (n = BIGBITS; n > 0; n--)
			{
				carry=(carry<<1); if(num>=carry){acc|=mask;carry-=num;} mask>>=1;
			}
#endif // OPTIMISE

			len++;
			dst--;
			*dst = acc;
		}
	}

	return len;

#endif // X86
}

///////////////////////////////////////////////////////////////////////////////
// divide data strings
//   carry = highest word of dividend src1
//   dst = destination, on result it will contain quotient with length (len1 - len2 + 1), dst can be NULL if result not required
//   src1 = source with dividend, length len1, on result it will contain remainder with length len2
//   len1 = length of src1 with dividend (must be len1 >= len2)
//   src2 = source with divisor, length len2
//   len2 = length of src2 with divisor (must be > 2, must be len1 >= len2)
// Divisor must be normalised - must be rotated left so its highest bit is 1 !
// Divisor must be not longer than dividend and must be long 3 or more entries.


// !!!!!!!!!!!!!
// qp (dst) = quotient, podil, delka bude nn-dn+1 (len1-len2+1)
// np (src1) = dividend, delenec, na vystupu obsahuje zbytek o delce dn
// nn (len1) = delka np (musi byt nn >= dn)
// n1 (carry) = carry, vyssi wrd z predesle operace, na konci to bude zbytek nejvyssi wrd
// dp (src2) = divisor, delitel
// dn (len2) = delka delitele (musi byt > 2)
// dinv

// remainder, zbytek
// mpn_div_qr_pi1

// d1 = posledni wrd delitele dp (nejvyssi bit musi byt 1, tj. vyuziva max velikost!)
// d0 = predposledni wrd delitele dp
// i = index iterace, deli se <n1 np[dn-1+i] .. np[i]> / <d1 d0 dp[dn-3] .. dp[0]>
//   i zacina s (nn - dn) a konci s 0

void bignum::Div2Str(buint carry, buint* dst /* = NULL */, buint* src1, bint len1, const buint* src2, bint len2)
{
	// prepare highest 2 words of divisor
	buint d1 = src2[len2 - 1];
	buint d0 = src2[len2 - 2];

	// loop through iterations, starting with iteration index at highest word of quotient
	bint inx;
	buint n0, q;
	for (inx = len1 - len2; inx >= 0; inx--)
	{
		// get highest word of current dividend (1st highest word is carry)
		n0 = src1[len2 + inx - 1]; // index len1-1 .. len2-1

		// if highest 2 words of dividend and divisor are equal
		if ((carry == d1) && (n0 == d0))
		{
			q = BIGMAX; // quotient will be all 1's


			// 
			carry = src1[len2 - 1 + inx];
		}
		else
		{



		}

		// save quotien
		if (dst != NULL) dst[inx] = q;
	}

	// save last carry to remainder
	src1[len2 - 1] = carry;
}
